import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from scipy.stats import zscore
from sklearn.tree import DecisionTreeClassifier
from statsmodels.stats.outliers_influence import variance_inflation_factor
from scipy.stats import shapiro
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import cross_val_score, KFold,StratifiedKFold, LeaveOneOut
from sklearn.decomposition import PCA
from sklearn.naive_bayes import GaussianNB
from scipy.stats import randint
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import pickle
from sklearn.ensemble import BaggingClassifier,AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
# !pip install
# !pip install mxnet-mkl==1.6.0 numpy==1.23.1
# ! pip install imblearn
Q1 A - Import ‘signal-data.csv’ as DataFrame.
data = pd.read_csv("/workspaces/GreatLearning/Featurization, Model Selection & Tuning/Project/signal-data.csv")
data.head()
| Time | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | ... | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | Pass/Fail | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2008-07-19 11:55:00 | 3030.93 | 2564.00 | 2187.7333 | 1411.1265 | 1.3602 | 100.0 | 97.6133 | 0.1242 | 1.5005 | ... | NaN | 0.5005 | 0.0118 | 0.0035 | 2.3630 | NaN | NaN | NaN | NaN | -1 |
| 1 | 2008-07-19 12:32:00 | 3095.78 | 2465.14 | 2230.4222 | 1463.6606 | 0.8294 | 100.0 | 102.3433 | 0.1247 | 1.4966 | ... | 208.2045 | 0.5019 | 0.0223 | 0.0055 | 4.4447 | 0.0096 | 0.0201 | 0.0060 | 208.2045 | -1 |
| 2 | 2008-07-19 13:17:00 | 2932.61 | 2559.94 | 2186.4111 | 1698.0172 | 1.5102 | 100.0 | 95.4878 | 0.1241 | 1.4436 | ... | 82.8602 | 0.4958 | 0.0157 | 0.0039 | 3.1745 | 0.0584 | 0.0484 | 0.0148 | 82.8602 | 1 |
| 3 | 2008-07-19 14:43:00 | 2988.72 | 2479.90 | 2199.0333 | 909.7926 | 1.3204 | 100.0 | 104.2367 | 0.1217 | 1.4882 | ... | 73.8432 | 0.4990 | 0.0103 | 0.0025 | 2.0544 | 0.0202 | 0.0149 | 0.0044 | 73.8432 | -1 |
| 4 | 2008-07-19 15:22:00 | 3032.24 | 2502.87 | 2233.3667 | 1326.5200 | 1.5334 | 100.0 | 100.3967 | 0.1235 | 1.5031 | ... | NaN | 0.4800 | 0.4766 | 0.1045 | 99.3032 | 0.0202 | 0.0149 | 0.0044 | 73.8432 | -1 |
5 rows × 592 columns
Q1 B - Print 5 point summary and share at least 2 observations.
data.describe()
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | Pass/Fail | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 1561.000000 | 1560.000000 | 1553.000000 | 1553.000000 | 1553.000000 | 1553.0 | 1553.000000 | 1558.000000 | 1565.000000 | 1565.000000 | ... | 618.000000 | 1566.000000 | 1566.000000 | 1566.000000 | 1566.000000 | 1566.000000 | 1566.000000 | 1566.000000 | 1566.000000 | 1567.000000 |
| mean | 3014.452896 | 2495.850231 | 2200.547318 | 1396.376627 | 4.197013 | 100.0 | 101.112908 | 0.121822 | 1.462862 | -0.000841 | ... | 97.934373 | 0.500096 | 0.015318 | 0.003847 | 3.067826 | 0.021458 | 0.016475 | 0.005283 | 99.670066 | -0.867262 |
| std | 73.621787 | 80.407705 | 29.513152 | 441.691640 | 56.355540 | 0.0 | 6.237214 | 0.008961 | 0.073897 | 0.015116 | ... | 87.520966 | 0.003404 | 0.017180 | 0.003720 | 3.578033 | 0.012358 | 0.008808 | 0.002867 | 93.891919 | 0.498010 |
| min | 2743.240000 | 2158.750000 | 2060.660000 | 0.000000 | 0.681500 | 100.0 | 82.131100 | 0.000000 | 1.191000 | -0.053400 | ... | 0.000000 | 0.477800 | 0.006000 | 0.001700 | 1.197500 | -0.016900 | 0.003200 | 0.001000 | 0.000000 | -1.000000 |
| 25% | 2966.260000 | 2452.247500 | 2181.044400 | 1081.875800 | 1.017700 | 100.0 | 97.920000 | 0.121100 | 1.411200 | -0.010800 | ... | 46.184900 | 0.497900 | 0.011600 | 0.003100 | 2.306500 | 0.013425 | 0.010600 | 0.003300 | 44.368600 | -1.000000 |
| 50% | 3011.490000 | 2499.405000 | 2201.066700 | 1285.214400 | 1.316800 | 100.0 | 101.512200 | 0.122400 | 1.461600 | -0.001300 | ... | 72.288900 | 0.500200 | 0.013800 | 0.003600 | 2.757650 | 0.020500 | 0.014800 | 0.004600 | 71.900500 | -1.000000 |
| 75% | 3056.650000 | 2538.822500 | 2218.055500 | 1591.223500 | 1.525700 | 100.0 | 104.586700 | 0.123800 | 1.516900 | 0.008400 | ... | 116.539150 | 0.502375 | 0.016500 | 0.004100 | 3.295175 | 0.027600 | 0.020300 | 0.006400 | 114.749700 | -1.000000 |
| max | 3356.350000 | 2846.440000 | 2315.266700 | 3715.041700 | 1114.536600 | 100.0 | 129.252200 | 0.128600 | 1.656400 | 0.074900 | ... | 737.304800 | 0.509800 | 0.476600 | 0.104500 | 99.303200 | 0.102800 | 0.079900 | 0.028600 | 737.304800 | 1.000000 |
8 rows × 591 columns
data.shape
(1567, 592)
data.dtypes
Time object
0 float64
1 float64
2 float64
3 float64
...
586 float64
587 float64
588 float64
589 float64
Pass/Fail int64
Length: 592, dtype: object
Observations
Column 0:
Minimum value = 2746 First Quartile (Q1) = 2966 Median (Second Quartile or Q2) = 3011 Third Quartile (Q3)= 3056 Maxmimum value = 3356
Column 1:
Minimum value = 2158 First Quartile (Q1) = 2452 Median (Second Quartile or Q2) = 2499 Third Quartile (Q3)= 2538 Maxmimum value = 2846
Column 588:
Minimum value = 0.001 First Quartile (Q1) = 0.003 Median (Second Quartile or Q2) = 0.004 Third Quartile (Q3)= 0.006 Maxmimum value = 0.02
Column 589:
Minimum value = 0 First Quartile (Q1) = 44 Median (Second Quartile or Q2) = 71 Third Quartile (Q3)= 114 Maxmimum value = 737
Q2 A - Write a for loop which will remove all the features with 20%+ Null values and impute rest with mean of the feature.
for column in data.columns:
null_percentage = data[column].isnull().sum()/len(data[column])
if null_percentage >= 0.2:
data.drop(column, axis=1, inplace=True)
# print(null_percentage,column)
else:
if data[column].dtypes == "O":
mode_value = data[column].mode()
# print("Number of NaN values before filling:", data[column].isna().sum())
data[column].fillna(mode_value, inplace=True)
# print("Number of NaN values after filling:", data[column].isna().sum())
# print()
else:
mean_value = data[column].mean()
# print("Number of NaN values after filling:", data[column].isna().sum())
data[column].fillna(mean_value, inplace=True)
# print("Number of NaN values after filling:", data[column].isna().sum())
# print()
data.shape
(1567, 560)
Q2 B - Identify and drop the features which are having same value for all the rows.
features_with_same_valuie = data.columns[data.nunique() == 1]
data.drop(columns=features_with_same_valuie, inplace=True)
print('After dropping features having same value for all the rows (rows,columns) =',data.shape)
# data.shape
After dropping features having same value for all the rows (rows,columns) = (1567, 444)
Q2 C - Drop other features if required using relevant functional knowledge. Clearly justify the same.
data.drop(columns=['Time'], inplace=True)
Dropping time columns since it won't help to predict the target column
Q2 D - Check for multi-collinearity in the data and take necessary action.
data.shape
(1567, 443)
data.head()
| 0 | 1 | 2 | 3 | 4 | 6 | 7 | 8 | 9 | 10 | ... | 577 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | Pass/Fail | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3030.93 | 2564.00 | 2187.7333 | 1411.1265 | 1.3602 | 97.6133 | 0.1242 | 1.5005 | 0.0162 | -0.0034 | ... | 14.9509 | 0.5005 | 0.0118 | 0.0035 | 2.3630 | 0.021458 | 0.016475 | 0.005283 | 99.670066 | -1 |
| 1 | 3095.78 | 2465.14 | 2230.4222 | 1463.6606 | 0.8294 | 102.3433 | 0.1247 | 1.4966 | -0.0005 | -0.0148 | ... | 10.9003 | 0.5019 | 0.0223 | 0.0055 | 4.4447 | 0.009600 | 0.020100 | 0.006000 | 208.204500 | -1 |
| 2 | 2932.61 | 2559.94 | 2186.4111 | 1698.0172 | 1.5102 | 95.4878 | 0.1241 | 1.4436 | 0.0041 | 0.0013 | ... | 9.2721 | 0.4958 | 0.0157 | 0.0039 | 3.1745 | 0.058400 | 0.048400 | 0.014800 | 82.860200 | 1 |
| 3 | 2988.72 | 2479.90 | 2199.0333 | 909.7926 | 1.3204 | 104.2367 | 0.1217 | 1.4882 | -0.0124 | -0.0033 | ... | 8.5831 | 0.4990 | 0.0103 | 0.0025 | 2.0544 | 0.020200 | 0.014900 | 0.004400 | 73.843200 | -1 |
| 4 | 3032.24 | 2502.87 | 2233.3667 | 1326.5200 | 1.5334 | 100.3967 | 0.1235 | 1.5031 | -0.0031 | -0.0072 | ... | 10.9698 | 0.4800 | 0.4766 | 0.1045 | 99.3032 | 0.020200 | 0.014900 | 0.004400 | 73.843200 | -1 |
5 rows × 443 columns
data_matrix = data.corr().abs()
print(data_matrix)
0 1 2 3 4 6 \
0 1.000000 0.143840 0.004756 0.007613 0.011014 0.002270
1 0.143840 1.000000 0.005767 0.007568 0.001636 0.025564
2 0.004756 0.005767 1.000000 0.298935 0.095891 0.136225
3 0.007613 0.007568 0.298935 1.000000 0.058483 0.685835
4 0.011014 0.001636 0.095891 0.058483 1.000000 0.074368
... ... ... ... ... ... ...
586 0.018443 0.009403 0.025495 0.034711 0.043929 0.041209
587 0.025880 0.017266 0.029345 0.039132 0.031005 0.034027
588 0.028166 0.010118 0.030818 0.033645 0.026100 0.032227
589 0.004174 0.044797 0.032890 0.080341 0.050910 0.043777
Pass/Fail 0.025141 0.002603 0.000957 0.024623 0.013756 0.016239
7 8 9 10 ... 577 582 \
0 0.031483 0.052622 0.009045 0.006504 ... 0.008601 0.000224
1 0.012037 0.031258 0.023964 0.009645 ... 0.010145 0.043556
2 0.146213 0.023528 0.016168 0.069893 ... 0.028705 0.006023
3 0.073856 0.102892 0.068215 0.049873 ... 0.016438 0.008988
4 0.347734 0.025946 0.054206 0.006470 ... 0.004070 0.045081
... ... ... ... ... ... ... ...
586 0.058113 0.010433 0.033738 0.000327 ... 0.002684 0.016726
587 0.021426 0.022845 0.059301 0.046965 ... 0.009405 0.024473
588 0.020893 0.026250 0.060758 0.046048 ... 0.015596 0.020705
589 0.107804 0.022770 0.004880 0.008393 ... 0.024766 0.041486
Pass/Fail 0.012991 0.028016 0.031191 0.033639 ... 0.049633 0.047020
583 584 585 586 587 588 \
0 0.023453 0.019907 0.023589 0.018443 0.025880 0.028166
1 0.002904 0.001264 0.002273 0.009403 0.017266 0.010118
2 0.015697 0.018225 0.015752 0.025495 0.029345 0.030818
3 0.025436 0.024736 0.026019 0.034711 0.039132 0.033645
4 0.001300 0.001597 0.001616 0.043929 0.031005 0.026100
... ... ... ... ... ... ...
586 0.002257 0.001605 0.002743 1.000000 0.167913 0.164238
587 0.002649 0.002498 0.002930 0.167913 1.000000 0.974276
588 0.002260 0.001957 0.002530 0.164238 0.974276 1.000000
589 0.003008 0.003295 0.003800 0.486559 0.390813 0.389211
Pass/Fail 0.005981 0.005419 0.005034 0.004156 0.035391 0.031167
589 Pass/Fail
0 0.004174 0.025141
1 0.044797 0.002603
2 0.032890 0.000957
3 0.080341 0.024623
4 0.050910 0.013756
... ... ...
586 0.486559 0.004156
587 0.390813 0.035391
588 0.389211 0.031167
589 1.000000 0.002653
Pass/Fail 0.002653 1.000000
[443 rows x 443 columns]
upper = data_matrix.where(np.triu(np.ones(data_matrix.shape), k=1).astype(bool))
print(upper)
0 1 2 3 4 6 7 \
0 NaN 0.14384 0.004756 0.007613 0.011014 0.002270 0.031483
1 NaN NaN 0.005767 0.007568 0.001636 0.025564 0.012037
2 NaN NaN NaN 0.298935 0.095891 0.136225 0.146213
3 NaN NaN NaN NaN 0.058483 0.685835 0.073856
4 NaN NaN NaN NaN NaN 0.074368 0.347734
... .. ... ... ... ... ... ...
586 NaN NaN NaN NaN NaN NaN NaN
587 NaN NaN NaN NaN NaN NaN NaN
588 NaN NaN NaN NaN NaN NaN NaN
589 NaN NaN NaN NaN NaN NaN NaN
Pass/Fail NaN NaN NaN NaN NaN NaN NaN
8 9 10 ... 577 582 583 \
0 0.052622 0.009045 0.006504 ... 0.008601 0.000224 0.023453
1 0.031258 0.023964 0.009645 ... 0.010145 0.043556 0.002904
2 0.023528 0.016168 0.069893 ... 0.028705 0.006023 0.015697
3 0.102892 0.068215 0.049873 ... 0.016438 0.008988 0.025436
4 0.025946 0.054206 0.006470 ... 0.004070 0.045081 0.001300
... ... ... ... ... ... ... ...
586 NaN NaN NaN ... NaN NaN NaN
587 NaN NaN NaN ... NaN NaN NaN
588 NaN NaN NaN ... NaN NaN NaN
589 NaN NaN NaN ... NaN NaN NaN
Pass/Fail NaN NaN NaN ... NaN NaN NaN
584 585 586 587 588 589 \
0 0.019907 0.023589 0.018443 0.025880 0.028166 0.004174
1 0.001264 0.002273 0.009403 0.017266 0.010118 0.044797
2 0.018225 0.015752 0.025495 0.029345 0.030818 0.032890
3 0.024736 0.026019 0.034711 0.039132 0.033645 0.080341
4 0.001597 0.001616 0.043929 0.031005 0.026100 0.050910
... ... ... ... ... ... ...
586 NaN NaN NaN 0.167913 0.164238 0.486559
587 NaN NaN NaN NaN 0.974276 0.390813
588 NaN NaN NaN NaN NaN 0.389211
589 NaN NaN NaN NaN NaN NaN
Pass/Fail NaN NaN NaN NaN NaN NaN
Pass/Fail
0 0.025141
1 0.002603
2 0.000957
3 0.024623
4 0.013756
... ...
586 0.004156
587 0.035391
588 0.031167
589 0.002653
Pass/Fail NaN
[443 rows x 443 columns]
to_drop = [column for column in upper.columns if any(upper[column] > 0.70)]
print(len(to_drop),to_drop)
241 ['17', '22', '26', '27', '30', '34', '35', '36', '39', '46', '50', '51', '54', '60', '65', '66', '70', '96', '98', '101', '104', '105', '106', '123', '124', '125', '127', '130', '140', '147', '148', '152', '154', '155', '163', '164', '165', '174', '185', '187', '196', '197', '198', '199', '202', '203', '204', '205', '206', '207', '209', '224', '248', '249', '252', '254', '270', '271', '272', '273', '274', '275', '277', '278', '279', '280', '281', '282', '283', '285', '286', '287', '288', '289', '290', '291', '294', '295', '296', '297', '298', '299', '300', '301', '302', '303', '304', '305', '306', '307', '308', '309', '310', '311', '312', '316', '317', '318', '319', '320', '321', '323', '324', '331', '332', '333', '334', '335', '336', '337', '338', '339', '340', '341', '342', '343', '344', '347', '348', '349', '350', '351', '352', '353', '354', '355', '356', '357', '359', '360', '361', '362', '363', '365', '366', '367', '368', '376', '377', '386', '387', '388', '389', '390', '391', '392', '393', '405', '406', '407', '408', '409', '410', '411', '412', '413', '415', '416', '417', '420', '421', '424', '425', '426', '427', '428', '429', '430', '431', '434', '435', '436', '437', '439', '440', '441', '442', '443', '444', '445', '446', '447', '448', '452', '453', '454', '455', '456', '457', '459', '467', '469', '470', '471', '473', '474', '475', '477', '478', '479', '480', '490', '491', '493', '494', '495', '496', '497', '520', '522', '523', '524', '525', '526', '527', '539', '540', '541', '545', '549', '551', '552', '553', '554', '555', '556', '557', '560', '561', '566', '567', '568', '569', '573', '574', '575', '576', '577', '584', '585', '588']
data.drop(to_drop, axis=1, inplace=True)
data.shape
(1567, 202)
We checked for the multi-collinearity in dataset and removed columns with high correlation
Now, no. features reduced from 443 to 202
Q2 E - Make all relevant modifications on the data using both functional/logical reasoning/assumptions.
data['Pass/Fail'].value_counts(normalize=True)
Pass/Fail -1 0.933631 1 0.066369 Name: proportion, dtype: float64
data['Pass/Fail']=data['Pass/Fail'].replace([-1,1],[0,1])
data['Pass/Fail'].value_counts(normalize=True)
Pass/Fail 0 0.933631 1 0.066369 Name: proportion, dtype: float64
We have modified target column , we have label encoded the target column and repaced -1 with 0 and 1 with 1
Q3 A - Perform a detailed univariate Analysis with appropriate detailed comments after each analysis.
Performing univariate analysis for 200+ column is diffcult but lets start with descriptive analysis
#Descriptive Statisctics
descriptive_stats = data.describe().T
print(descriptive_stats.head())
count mean std min 25% 50% \
0 1567.0 3014.452896 73.480613 2743.2400 2966.66500 3011.8400
1 1567.0 2495.850231 80.227793 2158.7500 2452.88500 2498.9100
2 1567.0 2200.547318 29.380932 2060.6600 2181.09995 2200.9556
3 1567.0 1396.376627 439.712852 0.0000 1083.88580 1287.3538
4 1567.0 4.197013 56.103066 0.6815 1.01770 1.3171
75% max
0 3056.5400 3356.3500
1 2538.7450 2846.4400
2 2218.0555 2315.2667
3 1590.1699 3715.0417
4 1.5296 1114.5366
def check_distribution(column):
_, p_value = shapiro(column)
if set(list(column))== set([0,1]):
return 'Binomial'
if p_value > 0.05:
return 'Normal'
skewness = column.skew()
if skewness > 0:
return 'Right Skewed'
elif skewness < 0:
return 'Left Skewed'
unique_ratio = column.nunique() / len(column)
if unique_ratio < 0.05:
return 'Uniform'
return 'Unknown'
data_dist = []
for i in data.columns:
# print(i)
data_dist.append([i,check_distribution(data[i])])
# print(set(data_dist))
data_dist = pd.DataFrame(data_dist,columns=["Feature","Distribution"])
data_dist["Distribution"].unique()
array(['Right Skewed', 'Left Skewed', 'Normal', 'Binomial'], dtype=object)
data.shape
(1567, 202)
data.columns
Index(['0', '1', '2', '3', '4', '6', '7', '8', '9', '10',
...
'565', '570', '571', '572', '582', '583', '586', '587', '589',
'Pass/Fail'],
dtype='object', length=202)
len(data.columns)
202
columns_per_row = 5
total_rows = (len(data.columns) + columns_per_row - 1) // columns_per_row
fig, axes = plt.subplots(total_rows, columns_per_row, figsize=(15, total_rows*3))
axes = axes.flatten()
for i, column in enumerate(data.columns):
sns.kdeplot(data[column], ax=axes[i], fill=True)
axes[i].set_title(column)
axes[i].set_xlabel('')
axes[i].set_ylabel('Density')
for j in range(len(data.columns), total_rows * columns_per_row):
fig.delaxes(axes[j])
plt.tight_layout()
plt.show()
data.shape
(1567, 202)
Insights from KDE plot
Most of the features in the analysis is skewed (left or right skewed) some are Normal and binomial distributed.
Features are gorups into three different distribution by using p value (found using shapiro-wilk test)
1.Normal Distribution (p value > 0.05)
2.Binomial Distribution (value 0/1)
3.Left Skewed Distribution (skew < 0)
4.Right Skewed Distribution (skew > 0)
data_dist["Distribution"].value_counts()
Distribution Right Skewed 151 Left Skewed 49 Normal 1 Binomial 1 Name: count, dtype: int64
Overall Distribution Insight
No of Right skewed fearures = 151
No of left skewed feartured = 49
No of normal distributed features = 1
No of binomial distributed features = 1
columns_per_row = 5
total_rows = (len(data.columns) + columns_per_row - 1) // columns_per_row
fig, axes = plt.subplots(total_rows, columns_per_row, figsize=(15, total_rows*3))
axes = axes.flatten()
for i, column in enumerate(data.columns):
sns.boxplot(data[column], ax=axes[i], fill=True,orient="h")
axes[i].set_title(column)
axes[i].set_xlabel('')
axes[i].set_ylabel('Boxplot')
for j in range(len(data.columns), total_rows * columns_per_row):
fig.delaxes(axes[j])
plt.tight_layout()
plt.show()
Insights from Boxplot From the plot, we can infer most of the columns are with outliers, so we can clean and remove outiers from all the columns
for i in data.columns[:-1]:
q1 = data[i].quantile(0.25)
q3 = data[i].quantile(0.75)
iqr = q3 - q1
low = q1 - 1.5 * iqr
high = q3 + 1.5 * iqr
data.loc[(data[i] < low) | (data[i] > high), i] = data[i].median()
data.shape
(1567, 202)
columns_per_row = 5
total_rows = (len(data.columns) + columns_per_row - 1) // columns_per_row
fig, axes = plt.subplots(total_rows, columns_per_row, figsize=(15, total_rows*3))
axes = axes.flatten()
for i, column in enumerate(data.columns):
sns.boxplot(data[column], ax=axes[i], fill=True,orient="h")
axes[i].set_title(column)
axes[i].set_xlabel('')
axes[i].set_ylabel('Boxplot')
for j in range(len(data.columns), total_rows * columns_per_row):
fig.delaxes(axes[j])
plt.tight_layout()
plt.show()
Now we have cleaned and removed all outliers form the dataset
Q3 B - Perform bivariate and multivariate analysis with appropriate detailed comments after each analysis.
Lets take subset of sample 10 columns and perform the analysis
data.shape
(1567, 202)
subset_columns = data.sample(10, axis=1, random_state=42)
subset_columns.corr()
| 137 | 18 | 41 | 423 | 550 | 170 | 95 | 487 | 433 | 64 | |
|---|---|---|---|---|---|---|---|---|---|---|
| 137 | 1.000000 | 0.019806 | 0.026938 | 0.061175 | 0.046226 | 0.009263 | 0.064987 | 0.020894 | -0.016252 | 0.004912 |
| 18 | 0.019806 | 1.000000 | 0.036318 | 0.005551 | -0.024781 | 0.012427 | -0.008170 | 0.028692 | -0.068229 | 0.006252 |
| 41 | 0.026938 | 0.036318 | 1.000000 | 0.009183 | 0.013748 | 0.045997 | 0.017352 | -0.058258 | -0.007330 | 0.040778 |
| 423 | 0.061175 | 0.005551 | 0.009183 | 1.000000 | 0.070703 | 0.070113 | 0.118962 | -0.062499 | 0.014841 | -0.023748 |
| 550 | 0.046226 | -0.024781 | 0.013748 | 0.070703 | 1.000000 | 0.008262 | -0.029039 | -0.040322 | -0.029160 | -0.031059 |
| 170 | 0.009263 | 0.012427 | 0.045997 | 0.070113 | 0.008262 | 1.000000 | 0.014416 | -0.100964 | -0.049580 | -0.046293 |
| 95 | 0.064987 | -0.008170 | 0.017352 | 0.118962 | -0.029039 | 0.014416 | 1.000000 | 0.031382 | -0.027062 | 0.043572 |
| 487 | 0.020894 | 0.028692 | -0.058258 | -0.062499 | -0.040322 | -0.100964 | 0.031382 | 1.000000 | 0.032866 | 0.013466 |
| 433 | -0.016252 | -0.068229 | -0.007330 | 0.014841 | -0.029160 | -0.049580 | -0.027062 | 0.032866 | 1.000000 | 0.034024 |
| 64 | 0.004912 | 0.006252 | 0.040778 | -0.023748 | -0.031059 | -0.046293 | 0.043572 | 0.013466 | 0.034024 | 1.000000 |
subset_columns.head()
| 137 | 18 | 41 | 423 | 550 | 170 | 95 | 487 | 433 | 64 | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 75.2 | 192.3963 | 4.515 | 53.6840 | 12.930000 | 0.7250 | 0.0000 | 0.0000 | 49.0013 | 21.7264 |
| 1 | 81.0 | 191.2872 | 2.773 | 61.8918 | 16.000000 | 1.0498 | 0.0001 | 820.7900 | 199.7866 | 19.1927 |
| 2 | 96.5 | 192.7035 | 3.080 | 50.6425 | 16.160000 | 1.0824 | 0.0002 | 74.0741 | 109.5747 | 16.1755 |
| 3 | 123.7 | 192.1557 | 3.080 | 94.4594 | 17.013313 | 0.9386 | 0.0002 | 71.7583 | 181.2641 | 15.6209 |
| 4 | 123.1 | 191.6037 | 2.209 | 85.2255 | 19.630000 | 0.5760 | -0.0001 | 587.3773 | 0.0000 | 20.0445 |
subset_columns["target"] = data["Pass/Fail"]
sns.pairplot(subset_columns)
<seaborn.axisgrid.PairGrid at 0x7f8c467cf100>
Insights from pairplot
We dont' see any outliers(since we removed), correlation between independent variables (since we removed multi collinearity) and we see some different distribution of data normal, skewed and binomial
correlation_matrix = subset_columns.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Heatmap of Subset of Columns')
plt.show()
Insights from heatmap
We dont' see any any high positive or negative correlation between independent variables since we have treated multicollinearity before
Q4 A - Segregate predictors vs target attributes.
X = data.iloc[:,:-1]
Y = data.iloc[:,-1]
X.head()
| 0 | 1 | 2 | 3 | 4 | 6 | 7 | 8 | 9 | 10 | ... | 564 | 565 | 570 | 571 | 572 | 582 | 583 | 586 | 587 | 589 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3030.93 | 2564.00 | 2187.7333 | 1411.1265 | 1.3602 | 97.6133 | 0.1242 | 1.5005 | 0.0162 | -0.0034 | ... | 6.444985 | 0.14561 | 533.8500 | 2.1113 | 8.95 | 0.5005 | 0.0118 | 0.021458 | 0.016475 | 99.670066 |
| 1 | 3095.78 | 2465.14 | 2230.4222 | 1463.6606 | 0.8294 | 102.3433 | 0.1247 | 1.4966 | -0.0005 | -0.0148 | ... | 6.444985 | 0.14561 | 535.0164 | 2.4335 | 5.92 | 0.5019 | 0.0223 | 0.009600 | 0.020100 | 208.204500 |
| 2 | 2932.61 | 2559.94 | 2186.4111 | 1698.0172 | 1.5102 | 95.4878 | 0.1241 | 1.4436 | 0.0041 | 0.0013 | ... | 6.290000 | 0.14280 | 535.0245 | 2.0293 | 11.21 | 0.4958 | 0.0157 | 0.020500 | 0.014800 | 82.860200 |
| 3 | 2988.72 | 2479.90 | 2199.0333 | 909.7926 | 1.3204 | 104.2367 | 0.1217 | 1.4882 | -0.0124 | -0.0033 | ... | 7.320000 | 0.16300 | 530.5682 | 2.0253 | 9.33 | 0.4990 | 0.0103 | 0.020200 | 0.014900 | 73.843200 |
| 4 | 3032.24 | 2502.87 | 2233.3667 | 1326.5200 | 1.5334 | 100.3967 | 0.1235 | 1.5031 | -0.0031 | -0.0072 | ... | 6.444985 | 0.14561 | 532.0155 | 2.0275 | 8.83 | 0.5002 | 0.0138 | 0.020200 | 0.014900 | 73.843200 |
5 rows × 201 columns
Y.unique()
array([0, 1])
Q4 B - Check for target balancing and fix it if found imbalanced.
data["Pass/Fail"].value_counts()/len(data["Pass/Fail"])
Pass/Fail 0 0.933631 1 0.066369 Name: count, dtype: float64
There is a class imbalance class 0 has nearly 94% of data and class 1 has 6% of data
smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(X, Y)
y_resampled.value_counts()/len(y_resampled)
Pass/Fail 0 0.5 1 0.5 Name: count, dtype: float64
Class imbalance has been fixed, each class has equal no of records (50% each)
Q4 C - Perform train-test split and standardize the data or vice versa if required.
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
X_train.shape
(2340, 201)
X_test.shape
(586, 201)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
Q4 D - Check if the train and test data have similar statistical characteristics when compared with original data.
data_stats = data.describe()
train_stats = X_train.describe()
test_stats = X_test.describe()
print("5 points summary of Orginal Dataset")
data_stats.T
5 points summary of Orginal Dataset
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1567.0 | 3010.002449 | 61.721903 | 2831.9100 | 2968.1950 | 3011.8400 | 3051.8400 | 3190.9700 |
| 1 | 1567.0 | 2496.985253 | 59.577861 | 2326.5900 | 2459.8700 | 2498.9100 | 2534.3250 | 2666.0400 |
| 2 | 1567.0 | 2201.001188 | 25.681452 | 2126.6555 | 2183.0556 | 2200.9556 | 2217.5778 | 2270.2556 |
| 3 | 1567.0 | 1345.707557 | 334.300811 | 711.0258 | 1084.3779 | 1287.3538 | 1551.6947 | 2347.9092 |
| 4 | 1567.0 | 1.296507 | 0.333287 | 0.6815 | 1.0177 | 1.3171 | 1.4905 | 2.2449 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 583 | 1567.0 | 0.013891 | 0.003183 | 0.0060 | 0.0116 | 0.0138 | 0.0158 | 0.0237 |
| 586 | 1567.0 | 0.020574 | 0.010188 | -0.0060 | 0.0135 | 0.0205 | 0.0274 | 0.0484 |
| 587 | 1567.0 | 0.015366 | 0.006558 | 0.0032 | 0.0106 | 0.0148 | 0.0190 | 0.0345 |
| 589 | 1567.0 | 76.511991 | 45.382032 | 0.0000 | 44.3686 | 72.0230 | 94.4159 | 220.0378 |
| Pass/Fail | 1567.0 | 0.066369 | 0.249005 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 1.0000 |
202 rows × 8 columns
print("5 points summary of Train Dataset")
train_stats.T
5 points summary of Train Dataset
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| 0 | 2340.0 | 3004.256469 | 59.878665 | 2831.9100 | 2962.943377 | 3001.515000 | 3043.724024 | 3190.9700 |
| 1 | 2340.0 | 2496.994359 | 55.266846 | 2326.5900 | 2462.289829 | 2498.910000 | 2531.182500 | 2664.5200 |
| 2 | 2340.0 | 2199.411263 | 23.088816 | 2126.6555 | 2182.027369 | 2199.276227 | 2214.859155 | 2270.2556 |
| 3 | 2340.0 | 1331.779702 | 297.220185 | 711.0258 | 1098.632416 | 1279.477538 | 1517.650450 | 2347.9092 |
| 4 | 2340.0 | 1.282883 | 0.297298 | 0.6815 | 1.048171 | 1.310100 | 1.461557 | 2.2449 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 582 | 2340.0 | 0.500326 | 0.003155 | 0.4915 | 0.498266 | 0.500367 | 0.502416 | 0.5090 |
| 583 | 2340.0 | 0.013886 | 0.002892 | 0.0060 | 0.011718 | 0.013615 | 0.015700 | 0.0237 |
| 586 | 2340.0 | 0.021079 | 0.009524 | -0.0060 | 0.014645 | 0.020987 | 0.027500 | 0.0484 |
| 587 | 2340.0 | 0.015953 | 0.006004 | 0.0032 | 0.011316 | 0.015141 | 0.020089 | 0.0345 |
| 589 | 2340.0 | 77.765677 | 43.191719 | 0.0000 | 48.246057 | 71.850864 | 96.753113 | 220.0378 |
201 rows × 8 columns
print("5 points summary of Test Dataset")
test_stats.T
5 points summary of Test Dataset
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| 0 | 586.0 | 2997.136603 | 58.070305 | 2846.070000 | 2958.076926 | 2995.745000 | 3031.835000 | 3190.7800 |
| 1 | 586.0 | 2496.375103 | 55.154780 | 2332.390000 | 2465.442500 | 2499.004939 | 2529.951022 | 2666.0400 |
| 2 | 586.0 | 2197.888701 | 22.180319 | 2134.744500 | 2181.308935 | 2197.715888 | 2213.152317 | 2256.6000 |
| 3 | 586.0 | 1343.587745 | 280.225169 | 867.302700 | 1117.189770 | 1288.668406 | 1520.972784 | 2341.7833 |
| 4 | 586.0 | 1.291169 | 0.298840 | 0.728748 | 1.063290 | 1.304509 | 1.484075 | 2.2328 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 582 | 586.0 | 0.500434 | 0.003128 | 0.491500 | 0.498301 | 0.500500 | 0.502400 | 0.5088 |
| 583 | 586.0 | 0.013757 | 0.002806 | 0.007100 | 0.011565 | 0.013683 | 0.015584 | 0.0236 |
| 586 | 586.0 | 0.020937 | 0.009581 | -0.006000 | 0.014531 | 0.020900 | 0.026765 | 0.0484 |
| 587 | 586.0 | 0.016083 | 0.006067 | 0.004500 | 0.011600 | 0.015517 | 0.019957 | 0.0339 |
| 589 | 586.0 | 78.830305 | 43.848858 | 0.000000 | 49.594581 | 71.653961 | 96.755252 | 220.0378 |
201 rows × 8 columns
# T.iloc[1,:]
summary_comparison = pd.concat([data_stats.T.iloc[1,:], train_stats.T.iloc[1,:], test_stats.T.iloc[1,:]], axis=1)
print("Comparision of 5 points summary of column 1 (sample coln) to check the statistical characteristics")
summary_comparison
Comparision of 5 points summary of column 1 (sample coln) to check the statistical characteristics
| 1 | 1 | 1 | |
|---|---|---|---|
| count | 1567.000000 | 2340.000000 | 586.000000 |
| mean | 2496.985253 | 2496.994359 | 2496.375103 |
| std | 59.577861 | 55.266846 | 55.154780 |
| min | 2326.590000 | 2326.590000 | 2332.390000 |
| 25% | 2459.870000 | 2462.289829 | 2465.442500 |
| 50% | 2498.910000 | 2498.910000 | 2499.004939 |
| 75% | 2534.325000 | 2531.182500 | 2529.951022 |
| max | 2666.040000 | 2664.520000 | 2666.040000 |
Orginal dataset, train dataset and test dataset has similar statistical characteristics
Q5 A - Use any Supervised Learning technique to train a model.
dTree = DecisionTreeClassifier(criterion = 'gini', random_state=1)
dTree.fit(X_train, y_train)
DecisionTreeClassifier(random_state=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier(random_state=1)
print("Train Score",dTree.score(X_train, y_train))
print("Test Score",dTree.score(X_test, y_test))
Train Score 1.0 Test Score 0.8822525597269625
The simple model has 1 as train score and 0.88 as test score
This seems to be overfitted since it perform very good with training set and low with test set
print("Confusion Matrix- Simple Dtree")
y_predict = dTree.predict(X_test)
cm=metrics.confusion_matrix(y_test, y_predict,labels=[0, 1])
df_cm = pd.DataFrame(cm, index = [i for i in ["No","Yes"]],
columns = [i for i in ["Predicted No","Predicted Yes"]])
plt.figure(figsize = (7,5))
sns.heatmap(df_cm, annot=True ,fmt='g')
Confusion Matrix- Simple Dtree
<Axes: >
predicted_labels_test = dTree.predict(X_test)
print("Test Performance Matrix: (Simple Dtree Model):")
metrics.classification_report(y_test, predicted_labels_test).split("\n")
Test Performance Matrix: (Simple Dtree Model):
[' precision recall f1-score support', '', ' 0 0.92 0.85 0.88 306', ' 1 0.85 0.92 0.88 280', '', ' accuracy 0.88 586', ' macro avg 0.88 0.88 0.88 586', 'weighted avg 0.89 0.88 0.88 586', '']
Simple Model Observation
Class False:
Precision is 0.92
Recall is 0.85
F1-Score is 0.88
Class True:
Precision is 0.85
Recall is 0.92
F1-Score is 0.88
Q5 B - Use cross validation techniques.
kf = KFold(n_splits=10, shuffle=True, random_state=42)
cv_scores = cross_val_score(DecisionTreeClassifier(), X_resampled, y_resampled, cv=kf)
print(cv_scores)
print("\nKfold Cross Validation")
print("Accuracy : %.1f%% (%.1f%%)"%(cv_scores.mean()*100,cv_scores.std()*100))
[0.88395904 0.8668942 0.91467577 0.90443686 0.89761092 0.89419795 0.91438356 0.88013699 0.90753425 0.89383562] Kfold Cross Validation Accuracy : 89.6% (1.5%)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
cv_scores = cross_val_score(DecisionTreeClassifier(), X_resampled, y_resampled, cv=skf)
print(cv_scores)
print("\nStratified Kfold Cross Validation")
print("Accuracy : %.1f%% (%.1f%%)"%(cv_scores.mean()*100,cv_scores.std()*100))
[0.87372014 0.89761092 0.90443686 0.87372014 0.92832765 0.92832765 0.90068493 0.91780822 0.90753425 0.90068493] Stratified Kfold Cross Validation Accuracy : 90.3% (1.8%)
Have tried 10 fold validation
K-fold validation - score - received accuracy is 89.6% (with 2.6% as standard decviartion from mean)
Stratified K-fold validation - score - received accuracy is 90.3% (with 1.8% as standard decviartion from mean)
Leave one out Cross validation - score - is not good idea to try because it will compute near to 1.5k iterations, this is not good approach and it is mainly use for small dataset
Conclusion
At last Stratified K-fold validation give best accuracy 90.3 % while comapred ti k-fold (89.6%)
Q5 C - Apply hyper-parameter tuning techniques to get the best accuracy.
param_grid = {
'criterion': ['gini', 'entropy'],
'splitter': ['best', 'random'],
'max_depth': [None, 10, 20, 30],
'min_samples_split': [2, 5, 10],
'min_samples_leaf': [1, 2, 4]
}
grid_search = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search.fit(X_train, y_train)
Fitting 5 folds for each of 144 candidates, totalling 720 fits
GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,
param_grid={'criterion': ['gini', 'entropy'],
'max_depth': [None, 10, 20, 30],
'min_samples_leaf': [1, 2, 4],
'min_samples_split': [2, 5, 10],
'splitter': ['best', 'random']},
scoring='accuracy', verbose=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,
param_grid={'criterion': ['gini', 'entropy'],
'max_depth': [None, 10, 20, 30],
'min_samples_leaf': [1, 2, 4],
'min_samples_split': [2, 5, 10],
'splitter': ['best', 'random']},
scoring='accuracy', verbose=1)DecisionTreeClassifier()
DecisionTreeClassifier()
dTreeP_model = grid_search.best_params_
dTreeP_model
{'criterion': 'entropy',
'max_depth': 20,
'min_samples_leaf': 2,
'min_samples_split': 10,
'splitter': 'best'}
dTreehp = DecisionTreeClassifier(criterion = 'entropy', max_depth=20,min_samples_leaf=2,min_samples_split=10,splitter = 'best',random_state=1)
dTreehp.fit(X_train, y_train)
DecisionTreeClassifier(criterion='entropy', max_depth=20, min_samples_leaf=2,
min_samples_split=10, random_state=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier(criterion='entropy', max_depth=20, min_samples_leaf=2,
min_samples_split=10, random_state=1)print("Hyper Parametertuning - Train Score",dTreehp.score(X_train, y_train))
print("Hyper Paramtertuning - Test Score",dTreehp.score(X_test, y_test))
Hyper Parametertuning - Train Score 0.9846153846153847 Hyper Paramtertuning - Test Score 0.8856655290102389
Insights from Hyper tuning Score improved from ordinary model
Train : 98.4 (reduced from 100) - overcomed overfitting
Test : 88.5 (improved from 88.2) - improved the score and normalised
Q5 D - Use any other technique/method which can enhance the model performance.
pca = PCA(n_components=10)
pca.fit(X_resampled)
PCA(n_components=10)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
PCA(n_components=10)
print(pca.explained_variance_)
[13845042.29453119 4541802.83785673 2768144.42643618 539312.54347369
142388.81973074 112639.00814615 109456.9333967 99141.69763772
95441.77895265 85710.87218695]
print(pca.components_)
[[ 9.90329021e-04 1.03248746e-03 -7.29179980e-04 ... 2.56145741e-07 -6.48616005e-08 -9.43401943e-04] [-1.03852355e-03 9.62204927e-04 2.19054829e-04 ... -3.55785954e-07 1.26937856e-07 2.17596664e-03] [-5.53705667e-04 -2.67157450e-03 2.06643834e-04 ... 1.01734591e-08 -1.14168622e-07 -1.26488786e-03] ... [ 1.74043629e-03 3.46837520e-03 7.70921279e-03 ... -1.65746007e-06 -5.17496363e-07 -1.08583484e-02] [ 1.03642149e-02 -1.76771800e-03 1.17899649e-02 ... -1.80648225e-06 -9.60361855e-07 -5.86849982e-03] [-3.23261415e-03 -1.62442877e-02 1.87092541e-02 ... -1.52065796e-06 -1.87997688e-07 2.01751417e-03]]
print(pca.explained_variance_ratio_)
[0.6009062 0.19712453 0.12014374 0.02340739 0.00618 0.00488879 0.00475068 0.00430297 0.00414239 0.00372005]
plt.bar(list(range(1,11)),pca.explained_variance_ratio_,alpha=0.5, align='center')
plt.ylabel('Variation explained')
plt.xlabel('# of PCA Components')
plt.show()
plt.step(list(range(1,11)),np.cumsum(pca.explained_variance_ratio_), where='mid')
plt.ylabel('Cum of variation explained')
plt.xlabel('# of PCA Components')
plt.show()
With 4 pca components its explaining more than 90 and near to 95 of variation so we can select 4 components
pca4 = PCA(n_components=4)
pca4.fit(X_resampled)
print(pca4.components_)
print(pca4.explained_variance_ratio_)
Xpca4 = pca4.transform(X_resampled)
[[ 7.64336501e-04 1.17203029e-03 -5.80489754e-04 -2.56582187e-03 1.70540860e-06 -2.59871166e-05 4.16197498e-08 7.88689037e-07 -2.64082580e-07 1.98219313e-07 2.65982355e-07 7.61884767e-06 -7.07161304e-05 -3.19050023e-04 -7.28799973e-06 2.14174132e-05 4.17927504e-07 1.29765155e-07 3.52747936e-02 -7.97061180e-03 -1.32297757e-01 1.25600773e-05 1.16181302e-04 -2.98135451e-05 -1.38543770e-06 4.85565809e-06 -1.36905369e-06 -3.14957736e-06 -1.28489215e-07 8.74198301e-05 1.60278307e-06 -1.97788690e-05 4.51498881e-06 -9.11301961e-05 5.11908054e-06 1.08333537e-04 5.11941460e-08 -4.45144436e-04 -1.39479834e-07 -1.39390239e-08 -1.46594688e-06 -9.41416597e-06 3.45157716e-06 -1.25092742e-04 2.58189120e-05 -5.84043790e-05 -1.69101062e-06 4.27960995e-05 -4.20506148e-06 -0.00000000e+00 1.59863235e-07 2.22153157e-07 -3.30400656e-07 1.82213354e-06 -1.62346856e-07 2.04830417e-06 2.47799403e-07 -6.37200375e-08 4.48054658e-06 -4.50693216e-09 7.85517297e-07 1.84034291e-07 -2.29895313e-05 3.81112223e-07 6.91569352e-03 7.74962604e-07 2.15139235e-08 -1.56013495e-08 -1.44192952e-09 1.32779085e-09 -4.99811976e-07 3.92117160e-09 -6.33038572e-07 -8.37358784e-08 -8.61380606e-07 -3.28687036e-07 6.66729260e-08 -0.00000000e+00 2.41360469e-04 -1.15554934e-08 -2.39207415e-05 9.74486634e-08 -6.95266936e-08 -1.56719260e-06 4.54073728e-07 1.30838357e-05 -1.48735009e-06 -2.85134289e-06 4.02186792e-06 -1.41291088e-08 -1.98919614e-06 2.00099355e-05 2.04115974e-05 3.58576366e-04 -1.06046444e-03 -1.08354471e-03 -8.80796868e-05 5.41393261e-03 2.84015667e-05 4.25331821e-09 -1.16308434e-07 2.59456367e-07 -6.35878536e-07 -4.68341113e-05 1.85420254e-05 -1.10752425e-07 2.26293224e-07 1.07797710e-02 -1.04125442e-02 -4.48335462e-02 9.89249015e-01 -1.33375635e-06 2.65677999e-05 2.72216023e-06 1.59859118e-05 -1.96364985e-06 1.35787070e-07 4.85116008e-07 1.50887578e-06 -2.26016926e-06 8.60267087e-07 3.06482824e-06 9.78887418e-05 -1.14750508e-06 2.48813794e-05 3.01701396e-05 1.17747210e-06 6.76671280e-06 5.07152398e-08 6.96373488e-05 6.22478138e-05 -8.31895155e-04 6.80375934e-08 -3.78011396e-09 -7.14910515e-07 -5.54500269e-07 -8.10749381e-08 -3.96054082e-07 2.63253638e-07 4.52775312e-07 -1.68005995e-05 3.16548809e-09 -6.70202487e-07 -2.08514070e-08 4.17512234e-04 -1.37180136e-03 -9.32683585e-08 -9.97578782e-08 -1.56830534e-08 2.66443985e-08 1.59647771e-04 1.39036949e-09 1.09275595e-07 -4.32428852e-06 3.95327157e-07 7.01273933e-05 1.72491265e-06 -6.08099205e-03 -1.46699598e-03 -1.64651718e-04 -1.56733179e-03 7.66519735e-03 1.77694940e-03 -8.66587452e-05 4.17347429e-03 3.04529485e-04 -7.27170363e-05 -4.60122622e-03 3.32439140e-05 -4.88075412e-05 4.98949320e-03 3.21592772e-03 4.71815284e-03 -5.40347107e-03 1.93260867e-04 -3.50344138e-03 -2.67262978e-03 -2.71635437e-04 3.77173221e-04 -0.00000000e+00 1.19633058e-07 3.58103609e-08 -3.65867069e-09 1.79786428e-07 1.56209176e-05 -1.03950688e-05 -1.34841433e-05 1.51926547e-07 -2.09390795e-06 2.36761865e-05 -1.44182988e-06 -3.26856148e-05 1.92148811e-07 -5.33524240e-05 -1.51452799e-06 2.08768557e-05 4.26052817e-08 -3.15399703e-08 3.02149897e-07 -5.93335698e-08 -9.46459812e-04] [-8.26633359e-04 9.33610420e-04 2.02711118e-04 6.22533146e-03 -8.66946064e-06 1.63689236e-04 1.45460064e-08 -6.03122400e-07 1.43293821e-07 6.89344387e-08 -3.52630667e-07 -2.23128596e-05 -1.11831423e-04 4.00355805e-04 1.09759549e-05 -2.71895058e-05 -4.84296571e-07 -1.75814746e-07 -1.48133843e-02 -3.55043301e-02 6.24565419e-02 -3.67565937e-06 -1.11073568e-04 1.17650565e-05 -1.00429665e-06 2.32786380e-06 1.63734799e-05 1.58977560e-05 4.26903349e-06 -1.88931740e-04 1.02089382e-05 -8.52196234e-05 2.17282572e-07 -3.76992628e-05 -3.19507418e-06 -6.03690577e-05 1.11587969e-07 -4.08420264e-04 1.49899776e-07 1.02019874e-08 7.48486536e-07 9.71113352e-05 -2.03117177e-06 1.50834147e-04 -7.61347045e-05 -1.16632028e-06 -1.33638081e-06 -1.02168784e-04 6.20762155e-04 -0.00000000e+00 -2.20085357e-07 -1.64520723e-06 -1.04438386e-06 -1.95344180e-06 -6.95847501e-09 9.88147691e-08 4.34254071e-07 2.04145626e-07 -2.04403378e-06 5.82458497e-08 1.68901094e-07 -5.06406326e-07 -4.43693680e-04 -7.09460698e-07 -6.93313592e-03 2.44287790e-06 1.58241947e-08 4.18196300e-09 -1.51225040e-09 8.81079141e-10 -2.03117288e-07 3.35930468e-09 -1.09684822e-06 9.16354137e-08 1.73731373e-06 5.60571506e-08 3.03813150e-07 -0.00000000e+00 -6.50522719e-04 1.10931233e-08 -9.79358498e-07 3.82553794e-08 -9.28667931e-09 -6.25963238e-07 8.99074537e-07 2.25319660e-06 -2.42882018e-06 -3.75210614e-06 -9.39336713e-07 -3.50814052e-08 -1.15683369e-07 2.43603047e-04 -1.63788041e-06 -7.71055219e-04 -2.28960877e-04 -4.65344001e-04 7.38156170e-05 1.55659613e-04 2.77062400e-06 3.90651109e-09 -1.31663126e-07 -4.25125584e-07 -4.55496732e-07 -1.43575980e-04 1.30551349e-04 7.62498598e-08 -8.52507481e-07 1.01343704e-02 7.40810509e-03 9.95396532e-01 5.37692161e-02 4.41763421e-05 -2.97437134e-07 -7.70265605e-07 -4.26986499e-06 -3.17427957e-06 -4.95046787e-07 -2.19651703e-07 -1.54698304e-06 -3.29659411e-06 -3.94779390e-06 4.17418485e-07 -1.08614479e-04 -4.06301616e-06 -2.81983391e-05 1.32546711e-04 1.10487722e-06 2.57443813e-04 8.35092464e-07 5.67446216e-05 -8.02676826e-05 -7.56052190e-04 1.35963457e-06 4.52460265e-07 -1.77966247e-07 -4.87327620e-08 7.01813285e-07 -6.48910082e-07 8.63538909e-07 1.27402312e-07 6.61979255e-06 3.05298327e-08 -1.09341612e-07 5.77605700e-08 7.13021835e-05 5.72577524e-03 6.74797379e-08 3.15981059e-08 1.37543496e-08 2.10546437e-08 4.33837258e-04 -7.34968946e-10 -4.82136587e-08 -1.21999713e-06 -1.02500007e-06 2.68290180e-05 -2.64461010e-05 9.54693380e-04 9.90488306e-03 -9.87673357e-04 1.48230446e-02 -1.64674649e-03 -1.19768492e-04 -6.11102766e-06 -3.20894434e-03 1.52657273e-03 -2.11270961e-04 -1.24418921e-03 -7.73098341e-04 -1.46375199e-03 -1.60712145e-03 -7.36517025e-03 2.87889475e-04 -6.57042016e-03 -1.58575171e-03 -5.33627996e-03 -4.96669803e-03 2.73588023e-04 1.15131634e-02 -0.00000000e+00 6.60752077e-08 -1.95787174e-08 3.89570168e-10 3.53494133e-06 3.98771626e-05 -5.64391977e-05 -1.67659807e-05 -1.85855515e-07 -5.97231139e-07 4.89648990e-05 -4.53369123e-08 6.24548899e-05 -5.36833481e-07 -1.61274101e-05 -6.56815045e-06 1.20638120e-04 1.03722810e-07 -4.26326895e-08 -2.80396734e-07 1.63925846e-07 1.96651218e-03] [ 1.54332195e-05 -2.59172731e-03 1.22211492e-04 -6.49411442e-03 -1.56146334e-05 1.14425484e-05 -9.06040868e-09 -9.48230363e-07 -3.57230685e-07 1.02161218e-07 -4.62604806e-07 7.17691842e-05 8.80137541e-05 -4.85096025e-05 4.02809644e-06 6.87865937e-05 -3.23126087e-07 4.45300538e-08 -8.08240895e-02 -1.40127042e-02 9.84363695e-01 -2.54861651e-05 -6.57802641e-05 8.77115092e-06 5.37284525e-06 1.91298935e-05 2.23782826e-05 1.69962140e-06 1.68086141e-05 -3.07091989e-05 8.95934207e-06 2.44920673e-04 -1.93072561e-06 -1.85669296e-04 4.87779508e-06 5.54760635e-05 -3.59060832e-07 6.87474955e-04 -5.11987267e-08 1.27572185e-07 2.77134705e-06 -6.67348743e-06 1.20812434e-05 -4.20665495e-04 2.61529682e-04 5.72863435e-05 3.07293314e-06 2.40836569e-05 -1.89226916e-04 -0.00000000e+00 1.36560531e-07 -9.97261644e-07 2.42135683e-07 -2.55740921e-06 2.70959326e-08 -2.60397621e-07 1.01993192e-07 -6.91253901e-07 1.00259823e-05 -6.02116574e-08 1.40796085e-07 7.40269475e-08 8.10111469e-04 -2.76801798e-08 -5.53343634e-03 -1.10055952e-06 4.57446533e-08 4.45549577e-08 5.14484981e-09 1.60481710e-09 -1.65072458e-06 7.91162825e-10 -1.39015895e-06 5.79117565e-08 -1.47162026e-06 1.17057741e-06 -1.13950892e-07 -0.00000000e+00 1.54281477e-03 -1.87905852e-09 -1.27277098e-05 -8.46954442e-08 -3.93154722e-08 -2.55245673e-06 4.30862190e-06 2.78975392e-05 -8.93922399e-06 -6.10214816e-06 3.45428578e-05 -5.35653573e-08 -8.91432768e-07 2.13584525e-04 -1.05279813e-04 6.48139631e-04 9.48138840e-04 1.78661048e-03 -3.40067943e-05 -7.89978153e-03 -2.77448943e-05 2.51538060e-09 -7.23496790e-07 4.79548120e-07 8.86921096e-08 4.93644087e-05 -1.15422293e-04 1.38473349e-07 -7.70184126e-07 -1.68801272e-02 9.43207717e-03 -7.03537120e-02 1.31616889e-01 3.44533344e-05 -5.01622803e-05 -5.62167106e-06 -2.39507970e-05 -3.29066972e-06 1.45343839e-06 1.59090355e-06 2.04568530e-06 -2.80839840e-06 8.85441526e-07 -5.35335431e-06 -3.94173379e-05 -2.17665405e-06 -1.26007264e-04 -1.58003119e-04 1.68928935e-06 5.57291920e-04 8.54547713e-08 6.26228715e-05 -1.10293543e-06 2.12275333e-04 1.17750601e-06 8.52767826e-08 4.07670387e-07 7.95392482e-07 5.04411151e-07 6.19609531e-08 2.24164823e-07 2.16349895e-07 -3.05031090e-05 -1.02440409e-08 -2.70527268e-07 8.91984859e-09 8.94834176e-04 -1.04059511e-02 2.06822991e-08 -1.36432093e-07 4.55471201e-08 4.77323222e-09 -1.76135369e-03 -6.54129033e-10 -2.33855986e-08 6.24578643e-06 -6.88719720e-07 -9.86477510e-05 4.20521234e-05 -9.84101480e-03 -3.10518970e-04 -8.16789329e-05 -1.78769472e-03 -2.10832222e-02 -1.85792115e-03 4.25788917e-04 5.53615780e-03 -2.46299482e-03 -3.71317657e-04 1.30532090e-02 -3.20595999e-03 9.54156000e-03 7.35062893e-03 -8.08056274e-04 -2.37648679e-03 -1.66148008e-02 3.62774625e-03 -2.74476447e-03 1.25998104e-02 6.94436040e-04 -3.93591715e-03 -0.00000000e+00 -5.34556074e-08 -3.93462752e-08 4.07525798e-09 4.11318231e-06 -1.77564157e-04 1.06624862e-04 3.44409287e-05 -5.65205066e-07 4.19549192e-06 8.43884967e-05 2.05252170e-06 -4.20610001e-05 9.70132806e-07 -1.34329013e-06 -3.25685270e-06 -2.62610382e-05 -1.87383545e-07 -7.51637557e-09 -2.51462851e-08 -1.13497271e-07 -9.64295713e-04] [ 1.09165560e-04 2.33811796e-03 1.90084421e-03 1.67212072e-03 3.53317119e-05 1.52269614e-04 -7.53161517e-08 1.61979615e-07 -1.14947866e-06 -4.54959083e-07 -1.76367160e-07 5.58093424e-05 -1.07464106e-04 -4.17965784e-04 -4.50231316e-05 8.13081157e-05 8.87283424e-07 3.97222204e-08 4.30146848e-02 9.94919103e-01 1.92267117e-02 9.37532244e-06 -4.39403265e-04 -2.39771982e-05 5.63146295e-06 8.35408204e-05 3.38689603e-05 4.44186630e-05 -1.74227903e-05 -8.54630275e-06 -5.77986542e-05 2.11880157e-04 1.40828503e-05 2.39578744e-04 -1.97473082e-05 3.60605466e-05 8.21008893e-07 2.48817849e-03 -4.11822824e-07 2.96668792e-07 -1.74964310e-06 -7.19768818e-05 4.58366304e-06 -7.20899086e-05 -4.68688740e-04 -1.61735823e-04 -5.19060741e-06 4.06674704e-04 1.28962069e-04 -0.00000000e+00 3.44497962e-08 -2.08148250e-06 8.39450532e-07 1.15341413e-06 -7.68760205e-07 9.22325561e-07 6.67913243e-07 2.30970969e-06 2.64919143e-05 1.24316135e-07 1.16540916e-06 3.53794448e-07 -9.15154180e-04 -1.36932119e-06 1.81017075e-03 1.51291540e-06 -1.75974219e-07 9.83567961e-08 1.54068176e-08 1.06073031e-09 1.33580432e-08 4.03704696e-09 6.46122919e-06 -1.81728478e-07 1.09592803e-06 3.36860427e-07 1.08094193e-07 -0.00000000e+00 -1.72804594e-04 -3.05394416e-08 -2.66156377e-05 2.89473437e-07 -1.24488496e-06 1.16706759e-05 -1.43731478e-05 -5.98912712e-05 1.27717258e-05 1.01955584e-05 -5.03002070e-06 -1.63126618e-08 2.53465423e-06 -1.05536747e-03 -4.89936866e-05 -1.74982567e-03 2.93849152e-03 -1.84699276e-03 1.65838703e-03 1.71883508e-02 7.54342108e-05 6.20236407e-08 -6.06469263e-07 -9.10767293e-07 1.22990332e-07 4.08571895e-05 3.35487078e-05 3.10990207e-07 -3.36133937e-07 -1.26298012e-02 -7.55945365e-03 3.44842587e-02 1.03809465e-02 9.33415824e-05 -4.05889802e-06 -2.49597221e-06 -1.60112585e-05 -2.55046233e-05 1.27121065e-08 1.97048727e-06 3.26704164e-06 -2.04578532e-06 -1.67909086e-05 5.65127850e-06 -1.51426022e-04 -7.98220363e-06 1.94764384e-04 -7.12216390e-04 -6.38577151e-06 -1.86286186e-03 1.21381547e-06 -4.27237015e-04 1.35102148e-04 3.80881267e-04 7.29433983e-08 1.48861148e-06 2.57126378e-06 2.35574281e-06 1.71621323e-06 -1.03113647e-07 -2.22226027e-06 -7.89950179e-07 -3.05362849e-05 -1.95607002e-08 -1.78783957e-06 -1.79068883e-08 -3.62936800e-03 1.28678907e-02 -8.33687039e-08 8.41277370e-10 -2.53921824e-08 -8.17935426e-08 1.48756671e-03 2.87367368e-08 4.23097892e-07 -2.09496575e-05 1.21207556e-06 1.28592166e-04 8.27919948e-06 2.49142043e-02 -4.78303714e-02 3.85390674e-04 1.49767593e-02 7.70506889e-03 1.74994345e-04 -6.34475212e-04 1.28442928e-03 3.23875667e-04 1.49898410e-03 -1.74575497e-02 4.55747810e-03 1.42067666e-03 -2.17210437e-03 1.50281614e-02 2.07856554e-02 -1.56426075e-02 1.25966174e-02 2.23993964e-03 2.02532004e-02 -1.49999407e-03 2.93990747e-02 -0.00000000e+00 1.20232366e-07 6.13476181e-08 -2.31057638e-09 -1.52582924e-05 1.50498058e-04 -2.65084411e-04 2.99961864e-04 -3.51119470e-07 9.77996970e-06 4.73783405e-05 2.17273026e-06 -3.49842703e-05 1.69888313e-06 -6.96002787e-05 1.09836456e-05 1.98647585e-04 -1.65025652e-07 -2.71493398e-07 2.01806742e-07 1.43136046e-07 2.68698951e-03]] [0.60194532 0.19629934 0.12039764 0.02308566]
Xpca4
array([[-2702.43927159, -2053.86145796, 750.79941754, -209.41424788],
[ 531.67099904, -2832.15824603, -1183.42548716, 284.27480282],
[ -618.68608676, -2443.58831873, -1617.64958583, -266.93710952],
...,
[-1628.52791777, -125.79916728, 256.94216287, -1235.76293333],
[ 8655.77517222, 327.98741341, 2444.22611374, 62.07592017],
[-1189.91363345, -2603.24192423, -981.99571763, 288.82852164]])
sns.pairplot(pd.DataFrame(Xpca4))
<seaborn.axisgrid.PairGrid at 0x7f84af5c29e0>
X_train_pca, X_test_pca, y_train_pca, y_test_pca = train_test_split(Xpca4, y_resampled, test_size=0.2, random_state=42,stratify=y_resampled)
dTree_p = DecisionTreeClassifier(criterion = 'gini', random_state=1)
dTree_p.fit(X_train_pca, y_train_pca)
print("Train Score",dTree_p.score(X_train_pca, y_train_pca))
print("Test Score",dTree_p.score(X_test_pca, y_test_pca))
Train Score 1.0 Test Score 0.8344709897610921
param_grid = {
'criterion': ['gini', 'entropy'],
'splitter': ['best', 'random'],
'max_depth': [5, 10, 15,20],
'min_samples_split': [2, 5, 10],
'min_samples_leaf': [1, 2, 4]
}
grid_search_p = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search_p.fit(X_train_pca, y_train_pca)
Fitting 5 folds for each of 144 candidates, totalling 720 fits
GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,
param_grid={'criterion': ['gini', 'entropy'],
'max_depth': [5, 10, 15, 20],
'min_samples_leaf': [1, 2, 4],
'min_samples_split': [2, 5, 10],
'splitter': ['best', 'random']},
scoring='accuracy', verbose=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,
param_grid={'criterion': ['gini', 'entropy'],
'max_depth': [5, 10, 15, 20],
'min_samples_leaf': [1, 2, 4],
'min_samples_split': [2, 5, 10],
'splitter': ['best', 'random']},
scoring='accuracy', verbose=1)DecisionTreeClassifier()
DecisionTreeClassifier()
dTreeP_model = grid_search_p.best_params_
dTreeP_model
{'criterion': 'entropy',
'max_depth': 20,
'min_samples_leaf': 1,
'min_samples_split': 2,
'splitter': 'best'}
dTreehp_p = DecisionTreeClassifier(criterion = 'entropy', max_depth=20,min_samples_leaf=1,min_samples_split=2,splitter = 'best',random_state=1)
dTreehp_p.fit(X_train_pca, y_train_pca)
print("Hyper Parametertuning (with PCA) - Train Score",dTreehp_p.score(X_train_pca, y_train_pca))
print("Hyper Paramtertuning (with PCA) - Test Score",dTreehp_p.score(X_test_pca, y_test_pca))
Hyper Parametertuning (with PCA) - Train Score 0.9987179487179487 Hyper Paramtertuning (with PCA) - Test Score 0.8225255972696246
Q5 E - Display and explain the classification report in detail.
print("Confusion Matrix- PCA HP")
y_predict = dTreehp_p.predict(X_test_pca)
cm=metrics.confusion_matrix(y_test_pca, y_predict,labels=[0, 1])
df_cm = pd.DataFrame(cm, index = [i for i in ["No","Yes"]],
columns = [i for i in ["Predicted No","Predicted Yes"]])
plt.figure(figsize = (7,5))
sns.heatmap(df_cm, annot=True ,fmt='g')
Confusion Matrix- PCA HP
<Axes: >
predicted_labels_test = dTreehp_p.predict(X_test_pca)
print("Test Performance Matrix: (PCA HP Dtree Model):")
metrics.classification_report(y_test_pca, predicted_labels_test).split("\n")
Test Performance Matrix: (PCA HP Dtree Model):
[' precision recall f1-score support', '', ' 0 0.83 0.81 0.82 293', ' 1 0.81 0.84 0.82 293', '', ' accuracy 0.82 586', ' macro avg 0.82 0.82 0.82 586', 'weighted avg 0.82 0.82 0.82 586', '']
PCA Model with Hypertuning Observation
Class False:
Precision is 0.83
Recall is 0.81
F1-Score is 0.82
Class True:
Precision is 0.81
Recall is 0.84
F1-Score is 0.82
Q5 F - Apply the above steps for all possible models that you have learnt so far.
Lets start with Logistic Regression model with hyperparamter tuning
param_grid = {
'penalty': ['l1', 'l2'],
'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]
}
clf = LogisticRegression(solver='liblinear')
grid_search_log = GridSearchCV(clf, param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search_log.fit(X_train_pca, y_train_pca)
best_params = grid_search_log.best_params_
best_accuracy = grid_search_log.best_score_
print("Best Hyperparameters:", best_params)
print("Best Accuracy:", best_accuracy)
print("Hyper Parametertuning (with PCA) (Log reg) - Train Score",grid_search_log.score(X_train_pca, y_train_pca))
print("Hyper Paramtertuning (with PCA) (Log reg) - Test Score",grid_search_log.score(X_test_pca, y_test_pca))
Fitting 5 folds for each of 14 candidates, totalling 70 fits
Best Hyperparameters: {'C': 10, 'penalty': 'l1'}
Best Accuracy: 0.5465811965811966
Hyper Parametertuning (with PCA) (Log reg) - Train Score 0.5444444444444444
Hyper Paramtertuning (with PCA) (Log reg) - Test Score 0.5511945392491467
Naive bayers (We don't have hyper paramter here that we haven't tried)
navb = GaussianNB()
navb.fit(X_train_pca, y_train_pca)
print(" (with PCA) (Naive bayers) - Train Score",navb.score(X_train_pca, y_train_pca))
print(" (with PCA) (Naive bayers) - Test Score",navb.score(X_test_pca, y_test_pca))
(with PCA) (Naive bayers) - Train Score 0.5478632478632479 (with PCA) (Naive bayers) - Test Score 0.5853242320819113
lets try for KNN with hyper paramter tuning
param_grid = {'n_neighbors': np.arange(3, 50, 2)}
grid_search_knn = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search_knn.fit(X_train_pca, y_train_pca)
best_params = grid_search_knn.best_params_
best_accuracy = grid_search_knn.best_score_
print("Best Hyperparameters:", best_params)
print("Best Accuracy:", best_accuracy)
print("Hyper Parametertuning (with PCA) (KNN) - Train Score",grid_search_knn.score(X_train_pca, y_train_pca))
print("Hyper Paramtertuning (with PCA) (KNN) - Test Score",grid_search_knn.score(X_test_pca, y_test_pca))
Fitting 5 folds for each of 24 candidates, totalling 120 fits
Best Hyperparameters: {'n_neighbors': 3}
Best Accuracy: 0.8371794871794872
Hyper Parametertuning (with PCA) (KNN) - Train Score 0.911965811965812
Hyper Paramtertuning (with PCA) (KNN) - Test Score 0.8191126279863481
Now, lets try for SVM with hyper parameter tuning
param_grid = {'C': [0.1, 1, 10, 100, 1000],
'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
'kernel' : ["rbf"]}
rand_search_svm = RandomizedSearchCV(SVC(), param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1,n_iter=10)
rand_search_svm.fit(X_train_pca, y_train_pca)
best_params = rand_search_svm.best_params_
best_accuracy = rand_search_svm.best_score_
print("Best Hyperparameters:", best_params)
print("Best Accuracy:", best_accuracy)
print("Hyper Parametertuning (with PCA) (SVM) - Train Score",rand_search_svm.score(X_train_pca, y_train_pca))
print("Hyper Paramtertuning (with PCA) (SVM) - Test Score",rand_search_svm.score(X_test_pca, y_test_pca))
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best Hyperparameters: {'kernel': 'rbf', 'gamma': 0.0001, 'C': 1}
Best Accuracy: 0.7576923076923077
Hyper Parametertuning (with PCA) (SVM) - Train Score 0.997008547008547
Hyper Paramtertuning (with PCA) (SVM) - Test Score 0.7713310580204779
Now, lets try for bagging with hyper parameter tuning
param_grid = {
'base_estimator__max_depth': [2, 3, 4, 5],
'n_estimators': [10, 50, 100, 200]
}
clf = BaggingClassifier(base_estimator=DecisionTreeClassifier(), random_state=42)
grid_search_bag = GridSearchCV(clf, param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search_bag.fit(X_train_pca, y_train_pca)
best_params = grid_search_bag.best_params_
best_accuracy = grid_search_bag.best_score_
print("Best Hyperparameters:", best_params)
print("Best Accuracy:", best_accuracy)
print("Hyper Parametertuning (with PCA) (bagging) - Train Score",grid_search_bag.score(X_train_pca, y_train_pca))
print("Hyper Paramtertuning (with PCA) (bagging) - Test Score",grid_search_bag.score(X_test_pca, y_test_pca))
Fitting 5 folds for each of 16 candidates, totalling 80 fits
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn(
Best Hyperparameters: {'base_estimator__max_depth': 5, 'n_estimators': 200}
Best Accuracy: 0.6790598290598291
Hyper Parametertuning (with PCA) (bagging) - Train Score 0.7329059829059829
Hyper Paramtertuning (with PCA) (bagging) - Test Score 0.6911262798634812
Now, lets try for Ada Boosting with hyper parameter tuning
param_grid = {
'base_estimator__max_depth': [2, 3, 4, 5],
'n_estimators': [50, 100, 200],
'learning_rate': [0.01, 0.1, 1.0]
}
clf = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(), random_state=42)
grid_search_ada = GridSearchCV(clf, param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search_ada.fit(X_train_pca, y_train_pca)
best_params = grid_search_ada.best_params_
best_accuracy = grid_search_ada.best_score_
print("Best Hyperparameters:", best_params)
print("Best Accuracy:", best_accuracy)
print("Hyper Parametertuning (with PCA) (Ada boosting) - Train Score",grid_search_ada.score(X_train_pca, y_train_pca))
print("Hyper Paramtertuning (with PCA) (Ada boosting) - Test Score",grid_search_ada.score(X_test_pca, y_test_pca))
Fitting 5 folds for each of 36 candidates, totalling 180 fits
/home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn( /home/codespace/.local/lib/python3.10/site-packages/sklearn/ensemble/_base.py:156: FutureWarning: `base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4. warnings.warn(
Best Hyperparameters: {'base_estimator__max_depth': 5, 'learning_rate': 1.0, 'n_estimators': 200}
Best Accuracy: 0.8414529914529915
Hyper Parametertuning (with PCA) (Ada boosting) - Train Score 1.0
Hyper Paramtertuning (with PCA) (Ada boosting) - Test Score 0.8447098976109215
Now, lets try for Gradient Boosting with hyper parameter tuning
param_grid = {
'n_estimators': [50, 100, 200],
'learning_rate': [0.01, 0.1, 1.0],
'max_depth': [3, 4, 5],
'subsample': [0.8, 0.9, 1.0],
'min_samples_split': [2, 5, 10]
}
rapid_search_gra = RandomizedSearchCV(GradientBoostingClassifier(random_state=42), param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1,n_iter=30)
rapid_search_gra.fit(X_train_pca, y_train_pca)
best_params = rapid_search_gra.best_params_
best_accuracy = rapid_search_gra.best_score_
print("Best Hyperparameters:", best_params)
print("Best Accuracy:", best_accuracy)
print("Hyper Parametertuning (with PCA) (Gradient boosting) - Train Score",rapid_search_gra.score(X_train_pca, y_train_pca))
print("Hyper Paramtertuning (with PCA) (Gradient boosting) - Test Score",rapid_search_gra.score(X_test_pca, y_test_pca))
Fitting 5 folds for each of 30 candidates, totalling 150 fits
Best Hyperparameters: {'subsample': 0.9, 'n_estimators': 200, 'min_samples_split': 2, 'max_depth': 5, 'learning_rate': 0.1}
Best Accuracy: 0.8170940170940172
Hyper Parametertuning (with PCA) (Gradient boosting) - Train Score 0.9931623931623932
Hyper Paramtertuning (with PCA) (Gradient boosting) - Test Score 0.8156996587030717
Now, lets try for Random Forest with hyper parameter tuning
param_grid = {
'n_estimators': randint(50, 200),
'max_depth': [None] + list(range(5, 20)),
'min_samples_split': randint(2, 11),
'min_samples_leaf': randint(1, 5)
}
rand_search_rand = RandomizedSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1,n_iter=20)
rand_search_rand.fit(X_train_pca, y_train_pca)
best_params = rand_search_rand.best_params_
best_accuracy = rand_search_rand.best_score_
print("Best Hyperparameters:", best_params)
print("Best Accuracy:", best_accuracy)
print("Hyper Parametertuning (with PCA) (Random Forest) - Train Score",rand_search_rand.score(X_train_pca, y_train_pca))
print("Hyper Paramtertuning (with PCA) (Random Forest) - Test Score",rand_search_rand.score(X_test_pca, y_test_pca))
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best Hyperparameters: {'max_depth': 17, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 194}
Best Accuracy: 0.8602564102564102
Hyper Parametertuning (with PCA) (Random Forest) - Train Score 0.9914529914529915
Hyper Paramtertuning (with PCA) (Random Forest) - Test Score 0.8515358361774744
Q6 A- Display and compare all the models designed with their train and test accuracies.
Q6 B - Select the final best trained model along with your detailed comments for selecting this model.
The best model from the comparision is Random forest
with train score = 0.99
and test score = 0.85
This will be th best performing model from the comparision , we can use this in production usage
Q6 C - Pickle the selected model for future use.
with open('prod_model_rand_forest.pkl', 'wb') as f:
pickle.dump(rand_search_rand, f)
Q6 D - Write your conclusion on the results.
We have performed several activities in this project , from cleaning the data, feature scaling (feature elimination for multicolinearity in independent features), reduced dimensions by using PCA (principal component analysis), tried several different supervised algorithm with hyper paramter tuning for each (except Naive algo) and select the best model as random forest based on its score and performance and saved(pickled) for use in future (production). This project gives very detailes analysis in each phase from feature scaling to model tuning.